#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
basic_land.py
--------------

This script assigns land use types to parcels for 

@author: hackettl
"""

import geopandas as gpd
import pandas as pd
import numpy as np
from pathlib import Path
import re

landuse_dir = '/Users/hackettl/Library/CloudStorage/Box-Box/Land use replication/'
meteredproj = 6933

#%% Read in data
# shapes to be assigned a land use 
parcels = gpd.read_file(landuse_dir+"RawData/Pajaro_Parcel_Project.shp")

# parcel IDs
parcelnums = pd.read_csv(landuse_dir+'out/parcel_IDs.csv')

# land use geometries for each year
landuse = {}
for file in Path(landuse_dir+'RawData/').glob("*.shp"):
    file_str = str(file)
    if "LandUse" in file_str:
        # print(file_str)
        year = re.search(r'\d+', file_str).group()
        print(f'Reading in {year}: {file_str[len(landuse_dir):]}')
        landuse[year] = gpd.read_file(file)

# fix one variable name for consistency across years
landuse['2015']['Land_Use'] = landuse['2015'].F2015Land_
# fix land use ID variable name for consistency across years
for k, v in landuse.items():
    landuse[k] = v.rename(
        columns = {
            'LU_Num_Cod' : 'land_code',
            'Land_Use_C' : 'land_code',
            'F2015LU_ID' : 'land_code'
            })
    
# make a dictionary of land codes - uses for each year
landcodes = {k : v[['land_code', 'Land_Use']].drop_duplicates().reset_index(drop = True) for k, v in landuse.items()}

# dissolve by land types for each year
landuse_dissolved = {k : v.dissolve(by='Land_Use') for k, v in landuse.items()}

# clean up
del file
del file_str
del year
del k
del v

#%% Define function for assigning land use type to parcels 

def area_land_use(parcel, land_uses):
    '''
    For a parcel, get the area of intersection with each land use geometry in 
    a data set. 
    
    Returns: pd.Series of areas (m2)
    '''
    try:
        # try and get area; if this doesnt work, use buffer (slower)
        return land_uses.intersection(parcel).area
    except:
        print("\t\t Geometry issue! Trying with buffer...")
        return land_uses.buffer(0).intersection(parcel).area
        

def apply_area_land_use(parcels, data, k=None):
    ''' 
    Apply area_land_use to each parcel in parcels and include parcel ID in 
    resulting pd.DataFrame
    
    Returns: pd.DataFrame of parcel IDs and areas for each land use type
    '''
    if k is not None:
        print(f'\t Working on {k}...')
    # make sure CRS are aligned 
    assert(parcels.crs == data.crs), 'CRS not aligned!'
    # get areas for each land use type
    dt = parcels.geometry.apply(lambda x: area_land_use(x, data)).apply(pd.Series)
    # add on parcel ID
    dt = dt.join(parcels[['ID']])
    return dt

# reshape dataframes to long
def melt_and_rename(dt):
    ''' 
    Reshape data wide to long and rename columns to be informative. 
    
    Returns: pd.DataFrame in long panel format 
    '''
    # reshape long
    dt_long = dt.melt(id_vars=['ID'])
    # fix column names
    dt_long.rename(columns = {'variable' : 'Land_Use', 'value' : 'area_m2'},
                   inplace = True)
    return dt_long

def alltogethernow(parcels, landuse):
    ''' 
    Pulls together all steps of the process:
        1. Harmonize CRS of the inputs to be a metered projection for accurate areas
        2. Get areas of intersection of parcels with land use type
        3. Reshape wide to long 
        4. Add land use codes to land use descriptions
        
    Returns: dictionary with keys = years (int), values = pd.DataFrame
    '''
    # 1 - harmonize CRS to be a metered projection (handy for areas later)
    print("Harmonizing CRS...")
    parcels = parcels.to_crs(meteredproj)
    land_use = {}
    for k, v in landuse.items():
        land_use[k] = v.to_crs(meteredproj)
    
    # 2 - apply area calculation to parcels
    print("Calculating areas... ")
    landuse_df = {k : apply_area_land_use(parcels, v, k) for k, v in land_use.items()}  
    # 3 - reshape data 
    print("Reshaping...")
    landuse_df = {k : melt_and_rename(v) for k, v in landuse_df.items()}
    # verified that the sum of intersected areas is weakly smaller than the total
    # area of the parcel. Some of these are much smaller, indicating that the 
    # parcel was not fully covered by land use data. 

    # 4 - add land use code, add year as variable
    print("Adding codes...")
    for k, v in landuse_df.items():
        landuse_df[k] = v.merge(landcodes[k], how = 'left')
        landuse_df[k]['year'] = int(k)

        
    return landuse_df

#%% apply area calculation 

print('Starting on calculating areas...')
landuse_areas = alltogethernow(parcels, landuse_dissolved)
print('Finished calculating areas!')

#%% Step 2: Replicate land_to_parcel_processing

# first append data together
dt = pd.concat(landuse_areas.values(), axis = 0).reset_index(drop = True)

# 1. Drop zero-area intersections
dt = dt[dt.area_m2 > 0].copy()

# 2. get area in acres
dt['acres'] = dt.area_m2*0.000247105

# 3. drop govt land; for this need the APN variable
dt = dt.merge(parcels[['ID', 'APN']])
dt = dt[dt.APN != 'GOVT LAND']
dt['apn'] = pd.to_numeric(dt.APN)

# 4. Drop mini parcels 
dt = dt[dt.acres >= 2]

# 5. Add on parcelnum
dt['ID'] = pd.to_numeric(dt.ID)
dt = dt.merge(parcelnums)

# some renames, drop unnecessary
dt.rename(columns = {'FID_Pajaro_Parcel_Project' : 'parcelnum',
                     'Land_Use' : 'land_use'},
          inplace = True)
dt.drop(columns=['ID', 'area_m2', 'APN'], inplace=True)

# # # # save # # # # # # # # # # #  # # # # # # # # # # # # # # # # # # 
dt.to_csv(landuse_dir+'out/parcel_landuse.csv',index = False)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 

# 6. Synthesize small categories
dt.loc[dt.land_code == 33, 'land_use'] = 'Cover Crop'
dt.loc[dt.land_code == 33, 'land_code'] = 0

apples = (dt.land_code==7) & (dt.land_use == "Deciduous (Non-Apple Orchards)")
dt.loc[apples, 'land_code'] = 10

dt.loc[dt.land_code == 16, 'land_code'] = 10

other = (dt.land_code == 0) & (dt.land_use == 'Other')
dt.loc[other, 'land_code'] = 10

dt.loc[dt.land_code == 10, 'land_use'] = 'Ag. Unknown'

# 7. Drop some unneeded categories
drop1 = (dt.land_code == 2) & (dt.land_use == 'Vegetable Row Crop')
dt = dt[~drop1].reset_index(drop = True)

drop2 = (dt.land_code == 1) & (dt.land_use == 'Fallow')
dt = dt[~drop2].reset_index(drop = True)

# 8. Remap land use names
use_map = {
    3 :  "Raspberries and/or Blackberries",
    4 :  "Blueberries",
    5 :  "Vine/Grapes",
    6 :  "Artichokes",
    7 :  "Apple Orchards" ,
    8 :  "Nurseries/Flowers/Tropical Plants",
    11 : "Residential" ,
    12 : "Industrial",
    0  : "Fallow" 
    }

dt['land_use'] = dt.land_code.map(use_map)

# 8a - drop cover crops, other
drop3 = (dt.land_code == 33) | (dt.land_code == 9)
dt = dt[~drop3].reset_index(drop = True)

# 9. Collapse acres by {parcelnum,year,land_code}
acres = dt.groupby(['parcelnum', 'year', 'land_code']).acres.sum().reset_index()

# reshape wide 
acres_wide = acres.pivot(index=['parcelnum', 'year'], columns='land_code', values='acres').reset_index()
# NA categories are 0's
acres_wide.fillna(0, inplace=True)

# 9. Some totals
acres_wide['vegstraw']    = acres_wide[1] + acres_wide[2]
acres_wide['otherag']     = acres_wide[[3, 4, 5, 6, 7, 8, 10]].sum(axis=1)
acres_wide['acres_irr']   = acres_wide.vegstraw + acres_wide.otherag
acres_wide['acres_agtot'] = acres_wide[0] + acres_wide.acres_irr
acres_wide['acres_nonag'] = acres_wide[[11, 12, 13]].sum(axis=1)
acres_wide['acres_tot']   = acres_wide[[0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13]].sum(axis=1)
acres_wide['frac_irr']    = acres_wide.acres_irr/acres_wide.acres_agtot

# some renaming
acres_wide.columns = ['acres'+str(int(c)) if type(c) is float else c for c in acres_wide.columns]

# # # # save # # # # # # # # # # #  # # # # # # # # # # # # # # # # # # 
acres_wide.to_csv(landuse_dir+'out/parcel_landuse_wide.csv', index = False)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 

# average parcel size over the years
fixedp = acres_wide.groupby('parcelnum').acres_tot.mean().reset_index()
fixedp.rename(columns={'acres_tot' : 'parcel_size_fixed'}, inplace=True)
acres_wide = acres_wide.merge(fixedp)

# # # # save # # # # # # #  # # # # # # # # # # # # # # # # # # # # # # 
acres_wide.to_csv(landuse_dir+'out/parcel_landuse_watermerge.csv', index = False)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 

